This code is for the results to the question: Does what caregivers are doing predict how much they engage with their target children?

Load libraries and set theme

library(tidyverse)
library(Hmisc)
library(GGally)
library(ppcor)
library(gridExtra)
library(psych)
library(sjPlot)
library(stargazer)

theme_set(theme_bw())

Read in data

# demographics
demo_english <- read_csv("./data_demo_lena_transcripts/demo_english_ms.csv") %>% 
  rename(id = ID, hi = HI24Final, momed = Momed) %>% 
  dplyr::select(id, hi, momed) %>% 
  mutate(id = as.character(id), 
         language = "english")

demo_spanish <- read_csv("./data_demo_lena_transcripts/demo_spanish_ms.csv") %>% 
  rename(id = ID, hi = HI_18, momed = MomEd_25m) %>% 
  dplyr::select(id, hi, momed) %>% 
  mutate(id = as.character(id), 
         language = "spanish")

demo <- rbind(demo_english, demo_spanish)


# freq
# sum across segments, so there are only up to 4 data points per child (1 for each activity group)
freq_reg <- read_csv("./data_demo_lena_transcripts/freq.csv") %>% 
  filter(activity != "kwalods", 
         speech == "all") %>% 
  mutate(activity = recode(activity, "play" = "cc", "conv" = "cc", 
                           "food" = "cc", "routines" = "cc", "gemods" = "non_tcds")) %>% 
  mutate(id = factor(id), 
         language = factor(language)) %>% 
  group_by(id, activity, speaker) %>% # not grouping by segment number because looking across the whole tCDS hour
  mutate(tokens_total_4act = sum(tokens), 
         types_total_4act = sum(types), 
         dur_min_4act = sum(dur_min)) %>% 
  distinct(id, activity, language, speaker, tokens_total_4act, types_total_4act, dur_min_4act) %>% 
  ungroup() %>% 
  mutate(activity = factor(activity, levels = c("books", "cc", "ac", "non_tcds"))) 

Create dfs for ADULTS and CHI

# subset for adult and child
freq_reg_adults <- freq_reg %>% 
  filter(speaker == "ADULTS")

freq_reg_child <- freq_reg %>% 
  filter(speaker == "CHI")

str(freq_reg_adults)
## tibble[,7] [312 × 7] (S3: tbl_df/tbl/data.frame)
##  $ id               : Factor w/ 90 levels "7292","7352",..: 47 50 52 53 54 55 56 57 61 65 ...
##  $ activity         : Factor w/ 4 levels "books","cc","ac",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ speaker          : chr [1:312] "ADULTS" "ADULTS" "ADULTS" "ADULTS" ...
##  $ language         : Factor w/ 2 levels "english","spanish": 1 1 1 1 1 1 1 1 1 1 ...
##  $ tokens_total_4act: num [1:312] 294 588 438 4607 4463 ...
##  $ types_total_4act : num [1:312] 135 199 112 1323 1111 ...
##  $ dur_min_4act     : num [1:312] 10.12 4.71 6.76 39.92 32.1 ...
str(freq_reg_child)
## tibble[,7] [312 × 7] (S3: tbl_df/tbl/data.frame)
##  $ id               : Factor w/ 90 levels "7292","7352",..: 47 50 52 53 54 55 56 57 61 65 ...
##  $ activity         : Factor w/ 4 levels "books","cc","ac",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ speaker          : chr [1:312] "CHI" "CHI" "CHI" "CHI" ...
##  $ language         : Factor w/ 2 levels "english","spanish": 1 1 1 1 1 1 1 1 1 1 ...
##  $ tokens_total_4act: num [1:312] 65 58 75 330 131 13 34 322 240 15 ...
##  $ types_total_4act : num [1:312] 34 17 36 181 47 7 16 124 89 9 ...
##  $ dur_min_4act     : num [1:312] 10.12 4.71 6.76 39.92 32.1 ...

Create tokens rate per hour - Children

freq_hr_child <- read_csv("./data_demo_lena_transcripts/freq.csv") %>% 
  dplyr::select(-X1) %>% 
  filter(speech == "all", 
         speaker == "CHI") %>% 
  group_by(id) %>% 
  mutate(tokens_sum_child = sum(tokens), 
         dur_hr = sum(dur_min)/60, 
         tokens_hr_child = tokens_sum_child/dur_hr) %>% 
  distinct(id, language, tokens_hr_child) %>% 
  ungroup() %>% 
  mutate(id = as.character(id))

Create wide df for regressions with 0s

# freq adult
freq_reg_adults_wide <- freq_reg_adults %>% 
  ungroup() %>% 
  group_by(id) %>% 
  mutate(tokens_sum_adult = sum(tokens_total_4act), 
         dur_hr = sum(dur_min_4act)/60, 
         tokens_hr_adult = tokens_sum_adult/dur_hr) %>% 
  full_join(freq_hr_child, by = c("id", "language")) %>% 
  distinct(id, activity, language, dur_min_4act, tokens_hr_adult, tokens_hr_child) %>% 
  spread(activity, dur_min_4act) %>% 
  replace_na(list(books = 0, cc = 0, ac = 0, non_tcds = 0)) %>% 
  mutate(br_group = ifelse(books == 0, 0, 1), 
         br_group = as.factor(br_group))


psych::describe(freq_reg_adults_wide$tokens_hr_adult)
##    vars  n    mean      sd median trimmed    mad    min  max   range skew kurtosis     se
## X1    1 90 3005.01 1274.48 2831.2 2950.56 1172.3 366.26 8302 7935.74  0.8     2.03 134.34

Descriptives

# descriptives
hist(freq_reg_adults_wide$books)

hist(freq_reg_adults_wide$cc)

hist(freq_reg_adults_wide$ac)

hist(freq_reg_adults_wide$non_tcds)

# difference between families who BR vs. non
ggplot(freq_reg_adults_wide, aes(x = br_group, y = tokens_hr_adult)) + 
  geom_boxplot() + 
  geom_jitter(alpha = .5)

# t-tests
t.test(tokens_hr_adult ~ br_group, 
       data = freq_reg_adults_wide, var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  tokens_hr_adult by br_group
## t = -3.3415, df = 88, p-value = 0.001224
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1359.4204  -345.4796
## sample estimates:
## mean in group 0 mean in group 1 
##        2607.197        3459.647

Regressions (Full sample) - books* + cc* + ac + non-tCDS

# freq_reg_adults_wide_rmoutlier <- freq_reg_adults_wide %>% 
#   filter(id != 20056 & id != 7412)
# nrow(freq_reg_adults_wide_rmoutlier)

# models with tokens
m1 <- lm(tokens_hr_adult ~ tokens_hr_child, 
         data = freq_reg_adults_wide)
m2 <- lm(tokens_hr_adult ~ tokens_hr_child + language, 
         data = freq_reg_adults_wide)
m3 <- lm(tokens_hr_adult ~ tokens_hr_child + language + books,
         data = freq_reg_adults_wide)
m4 <- lm(tokens_hr_adult ~ tokens_hr_child + language + books + cc,
         data = freq_reg_adults_wide)
m5 <- lm(tokens_hr_adult ~ tokens_hr_child + language + books + cc + ac,
         data = freq_reg_adults_wide)
m6 <- lm(tokens_hr_adult ~ tokens_hr_child + language + books + cc + ac + non_tcds,
         data = freq_reg_adults_wide)

anova(m1, m2, m3, m4, m5, m6)
## Analysis of Variance Table
## 
## Model 1: tokens_hr_adult ~ tokens_hr_child
## Model 2: tokens_hr_adult ~ tokens_hr_child + language
## Model 3: tokens_hr_adult ~ tokens_hr_child + language + books
## Model 4: tokens_hr_adult ~ tokens_hr_child + language + books + cc
## Model 5: tokens_hr_adult ~ tokens_hr_child + language + books + cc + ac
## Model 6: tokens_hr_adult ~ tokens_hr_child + language + books + cc + ac + 
##     non_tcds
##   Res.Df       RSS Df Sum of Sq       F    Pr(>F)    
## 1     88 125184941                                   
## 2     87 116398481  1   8786460  9.7597  0.002457 ** 
## 3     86  81225939  1  35172541 39.0686 1.682e-08 ***
## 4     85  75762325  1   5463615  6.0688  0.015825 *  
## 5     84  75413805  1    348520  0.3871  0.535520    
## 6     83  74722972  1    690833  0.7674  0.383564    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m4)
## 
## Call:
## lm(formula = tokens_hr_adult ~ tokens_hr_child + language + books + 
##     cc, data = freq_reg_adults_wide)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1693.9  -577.2  -223.8   519.9  3434.6 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1774.6234   308.3866   5.755 1.34e-07 ***
## tokens_hr_child    1.3555     0.4259   3.183  0.00204 ** 
## languagespanish -619.4245   199.5598  -3.104  0.00259 ** 
## books             76.9906    11.6153   6.628 2.95e-09 ***
## cc                21.9120     8.8503   2.476  0.01528 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 944.1 on 85 degrees of freedom
## Multiple R-squared:  0.4759, Adjusted R-squared:  0.4513 
## F-statistic:  19.3 on 4 and 85 DF,  p-value: 2.52e-11
# checking if language is a moderator
m4 <- lm(tokens_hr_adult ~ tokens_hr_child + language + books + cc,
         data = freq_reg_adults_wide)
m4_intx1 <- lm(tokens_hr_adult ~ tokens_hr_child + (language * books) + cc,
         data = freq_reg_adults_wide)
m4_intx2 <- lm(tokens_hr_adult ~ tokens_hr_child + (language * books) + (language * cc),
         data = freq_reg_adults_wide)

anova(m4, m4_intx1, m4_intx2)
## Analysis of Variance Table
## 
## Model 1: tokens_hr_adult ~ tokens_hr_child + language + books + cc
## Model 2: tokens_hr_adult ~ tokens_hr_child + (language * books) + cc
## Model 3: tokens_hr_adult ~ tokens_hr_child + (language * books) + (language * 
##     cc)
##   Res.Df      RSS Df Sum of Sq      F  Pr(>F)  
## 1     85 75762325                              
## 2     84 72788793  1   2973532 3.4102 0.06836 .
## 3     83 72372375  1    416419 0.4776 0.49145  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m4_intx1)
## 
## Call:
## lm(formula = tokens_hr_adult ~ tokens_hr_child + (language * 
##     books) + cc, data = freq_reg_adults_wide)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1797.7  -605.6  -150.2   471.4  2859.9 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           1623.278    314.853   5.156 1.65e-06 ***
## tokens_hr_child          1.374      0.420   3.271  0.00156 ** 
## languagespanish       -389.510    232.639  -1.674  0.09779 .  
## books                   99.176     16.571   5.985 5.14e-08 ***
## cc                      22.690      8.736   2.597  0.01110 *  
## languagespanish:books  -41.563     22.437  -1.852  0.06748 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 930.9 on 84 degrees of freedom
## Multiple R-squared:  0.4965, Adjusted R-squared:  0.4665 
## F-statistic: 16.57 on 5 and 84 DF,  p-value: 2.366e-11
# # check on potential outlier - removing these didn't change pattern of findings
# # 20056 - possibly bc high densest hour; 7412 - less clear, but does have 3rd highest cc value
# cooksd <- cooks.distance(m4)
# View(cooksd)
# influential <- as.numeric(names(cooksd)[(cooksd > 4*mean(cooksd, na.rm=T))])  # influential row numbers
# inf_df <- head(freq_reg_adults_wide_rmoutlier[influential, ])  # influential observations
# View(inf_df)


# table
stargazer(m1, m2, m3, m4, type = "text",
         star.char = c(".","*","**","***"),
         star.cutoffs = c(.1, .05, .01, .001),
         notes = c(". p<0.1; * p<0.05; ** p<0.01; *** p<0.001"),
         notes.append = F,
         digits = 3,
         dep.var.labels = c("Word Tokens in the Densest hour"),
         covariate.labels=c("Child Tokens per Hour", "Language Group",
                            "Book Sharing (min)",
                            "Other-Child-Centered Activities (min)"))
## 
## =================================================================================================================================
##                                                                           Dependent variable:                                    
##                                       -------------------------------------------------------------------------------------------
##                                                                     Word Tokens in the Densest hour                              
##                                                (1)                    (2)                    (3)                    (4)          
## ---------------------------------------------------------------------------------------------------------------------------------
## Child Tokens per Hour                        1.940***               1.845***               1.527***               1.355**        
##                                              (0.526)                (0.511)                (0.433)                (0.426)        
##                                                                                                                                  
## Language Group                                                     -626.536*              -623.618**             -619.424**      
##                                                                    (244.485)              (205.418)              (199.560)       
##                                                                                                                                  
## Book Sharing (min)                                                                        71.738***              76.991***       
##                                                                                            (11.756)               (11.615)       
##                                                                                                                                  
## Other-Child-Centered Activities (min)                                                                             21.912*        
##                                                                                                                   (8.850)        
##                                                                                                                                  
## Constant                                   2,160.060***           2,514.431***           2,256.858***           1,774.623***     
##                                             (261.190)              (288.588)              (246.119)              (308.387)       
##                                                                                                                                  
## ---------------------------------------------------------------------------------------------------------------------------------
## Observations                                    90                     90                     90                     90          
## R2                                            0.134                  0.195                  0.438                  0.476         
## Adjusted R2                                   0.124                  0.176                  0.419                  0.451         
## Residual Std. Error                    1,192.710 (df = 88)    1,156.682 (df = 87)     971.848 (df = 86)      944.098 (df = 85)   
## F Statistic                           13.621*** (df = 1; 88) 10.525*** (df = 2; 87) 22.353*** (df = 3; 86) 19.297*** (df = 4; 85)
## =================================================================================================================================
## Note:                                                                                   . p<0.1; * p<0.05; ** p<0.01; *** p<0.001
# diagnostics
plot_model(m4, type = "diag")
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

Regressions (Full sample) - cc + books* + ac + non-tCDS

# models with tokens and cc before book sharing - cc doesn't add before book sharing
m3.cc <- lm(tokens_hr_adult ~ tokens_hr_child + language + cc,
         data = freq_reg_adults_wide)

anova(m1, m2, m3.cc, m4, m5, m6)
## Analysis of Variance Table
## 
## Model 1: tokens_hr_adult ~ tokens_hr_child
## Model 2: tokens_hr_adult ~ tokens_hr_child + language
## Model 3: tokens_hr_adult ~ tokens_hr_child + language + cc
## Model 4: tokens_hr_adult ~ tokens_hr_child + language + books + cc
## Model 5: tokens_hr_adult ~ tokens_hr_child + language + books + cc + ac
## Model 6: tokens_hr_adult ~ tokens_hr_child + language + books + cc + ac + 
##     non_tcds
##   Res.Df       RSS Df Sum of Sq       F    Pr(>F)    
## 1     88 125184941                                   
## 2     87 116398481  1   8786460  9.7597  0.002457 ** 
## 3     86 114922558  1   1475922  1.6394  0.203973    
## 4     85  75762325  1  39160234 43.4980 3.706e-09 ***
## 5     84  75413805  1    348520  0.3871  0.535520    
## 6     83  74722972  1    690833  0.7674  0.383564    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m3.cc)
## 
## Call:
## lm(formula = tokens_hr_adult ~ tokens_hr_child + language + cc, 
##     data = freq_reg_adults_wide)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1764.9  -734.8  -213.7   677.6  5406.5 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2277.6447   365.9879   6.223 1.71e-08 ***
## tokens_hr_child    1.7695     0.5158   3.431 0.000927 ***
## languagespanish -624.5021   244.3464  -2.556 0.012352 *  
## cc                11.1971    10.6544   1.051 0.296230    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1156 on 86 degrees of freedom
## Multiple R-squared:  0.205,  Adjusted R-squared:  0.1773 
## F-statistic: 7.393 on 3 and 86 DF,  p-value: 0.0001828
summary(m4)
## 
## Call:
## lm(formula = tokens_hr_adult ~ tokens_hr_child + language + books + 
##     cc, data = freq_reg_adults_wide)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1693.9  -577.2  -223.8   519.9  3434.6 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1774.6234   308.3866   5.755 1.34e-07 ***
## tokens_hr_child    1.3555     0.4259   3.183  0.00204 ** 
## languagespanish -619.4245   199.5598  -3.104  0.00259 ** 
## books             76.9906    11.6153   6.628 2.95e-09 ***
## cc                21.9120     8.8503   2.476  0.01528 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 944.1 on 85 degrees of freedom
## Multiple R-squared:  0.4759, Adjusted R-squared:  0.4513 
## F-statistic:  19.3 on 4 and 85 DF,  p-value: 2.52e-11
summary(m6)
## 
## Call:
## lm(formula = tokens_hr_adult ~ tokens_hr_child + language + books + 
##     cc + ac + non_tcds, data = freq_reg_adults_wide)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1501.6  -565.1  -220.8   469.5  3389.3 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)   
## (Intercept)     -3063.4274  5305.5891  -0.577  0.56523   
## tokens_hr_child     1.3203     0.4302   3.069  0.00290 **
## languagespanish  -664.6020   209.9614  -3.165  0.00217 **
## books             159.0264    89.3238   1.780  0.07868 . 
## cc                103.4476    89.0727   1.161  0.24882   
## ac                 90.3713    91.5556   0.987  0.32648   
## non_tcds           78.4925    89.6045   0.876  0.38356   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 948.8 on 83 degrees of freedom
## Multiple R-squared:  0.4831, Adjusted R-squared:  0.4457 
## F-statistic: 12.93 on 6 and 83 DF,  p-value: 2.897e-10

Regressions (Full sample) - books* + cc* + non-tCDS + ac; AC does not add above non-tCDS

m5.ods <- lm(tokens_hr_adult ~ tokens_hr_child + language + books + cc + non_tcds,
         data = freq_reg_adults_wide)

anova(m1, m2, m3, m4, m5.ods, m6)
## Analysis of Variance Table
## 
## Model 1: tokens_hr_adult ~ tokens_hr_child
## Model 2: tokens_hr_adult ~ tokens_hr_child + language
## Model 3: tokens_hr_adult ~ tokens_hr_child + language + books
## Model 4: tokens_hr_adult ~ tokens_hr_child + language + books + cc
## Model 5: tokens_hr_adult ~ tokens_hr_child + language + books + cc + non_tcds
## Model 6: tokens_hr_adult ~ tokens_hr_child + language + books + cc + ac + 
##     non_tcds
##   Res.Df       RSS Df Sum of Sq       F    Pr(>F)    
## 1     88 125184941                                   
## 2     87 116398481  1   8786460  9.7597  0.002457 ** 
## 3     86  81225939  1  35172541 39.0686 1.682e-08 ***
## 4     85  75762325  1   5463615  6.0688  0.015825 *  
## 5     84  75600109  1    162216  0.1802  0.672313    
## 6     83  74722972  1    877137  0.9743  0.326480    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Regressions (BR only families) - books* + cc + ac + non-tCDS; same if switch cc and books

# models with ONLY BR families
bronly_tokens <- freq_reg_adults_wide %>% 
  filter(br_group == 1)


# regressions
m1.bronly <- lm(tokens_hr_adult ~ tokens_hr_child, 
         data = bronly_tokens)
m2.bronly <- lm(tokens_hr_adult ~ tokens_hr_child + language,
         data = bronly_tokens)
m3.bronly <- lm(tokens_hr_adult ~ tokens_hr_child + language + books,
         data = bronly_tokens)
m4.bronly <- lm(tokens_hr_adult ~ tokens_hr_child + language + books + cc,
         data = bronly_tokens)
m5.bronly <- lm(tokens_hr_adult ~ tokens_hr_child + language + books + cc + ac,
         data = bronly_tokens)
m6.bronly <- lm(tokens_hr_adult ~ tokens_hr_child + language + books + cc + ac + non_tcds,
         data = bronly_tokens)

anova(m1.bronly, m2.bronly, m3.bronly, m4.bronly, m5.bronly, m6.bronly)
## Analysis of Variance Table
## 
## Model 1: tokens_hr_adult ~ tokens_hr_child
## Model 2: tokens_hr_adult ~ tokens_hr_child + language
## Model 3: tokens_hr_adult ~ tokens_hr_child + language + books
## Model 4: tokens_hr_adult ~ tokens_hr_child + language + books + cc
## Model 5: tokens_hr_adult ~ tokens_hr_child + language + books + cc + ac
## Model 6: tokens_hr_adult ~ tokens_hr_child + language + books + cc + ac + 
##     non_tcds
##   Res.Df      RSS Df Sum of Sq       F    Pr(>F)    
## 1     40 71375267                                   
## 2     39 66449426  1   4925841  4.7571   0.03598 *  
## 3     38 40082948  1  26366478 25.4633 1.396e-05 ***
## 4     37 39680008  1    402941  0.3891   0.53680    
## 5     36 38279155  1   1400852  1.3529   0.25265    
## 6     35 36241501  1   2037654  1.9679   0.16948    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m3.bronly)
## 
## Call:
## lm(formula = tokens_hr_adult ~ tokens_hr_child + language + books, 
##     data = bronly_tokens)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1505.9  -784.9  -204.2   566.6  3003.4 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2351.6180   424.7781   5.536 2.46e-06 ***
## tokens_hr_child    0.9960     0.6479   1.537    0.132    
## languagespanish -748.2907   318.1371  -2.352    0.024 *  
## books             83.1373    16.6287   5.000 1.33e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1027 on 38 degrees of freedom
## Multiple R-squared:  0.4519, Adjusted R-squared:  0.4086 
## F-statistic: 10.44 on 3 and 38 DF,  p-value: 3.792e-05
# checking if language is a moderator
m3.bronly_intx <- lm(tokens_hr_adult ~ tokens_hr_child + (language * books),
         data = bronly_tokens)
anova(m3.bronly, m3.bronly_intx)
## Analysis of Variance Table
## 
## Model 1: tokens_hr_adult ~ tokens_hr_child + language + books
## Model 2: tokens_hr_adult ~ tokens_hr_child + (language * books)
##   Res.Df      RSS Df Sum of Sq      F  Pr(>F)  
## 1     38 40082948                              
## 2     37 36928936  1   3154013 3.1601 0.08368 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# table
stargazer(m1.bronly, m2.bronly, m3.bronly, type = "text",
         star.char = c(".","*","**","***"),
         star.cutoffs = c(.1, .05, .01, .001),
         notes = c(". p<0.1; * p<0.05; ** p<0.01; *** p<0.001"),
         notes.append = F,
         digits = 3,
         dep.var.labels = c("Word Tokens in the Densest hour"),
         covariate.labels=c("Child Tokens per Hour", "Language Group",
                            "Book Sharing (min)"))
## 
## ====================================================================================
##                                            Dependent variable:                      
##                       --------------------------------------------------------------
##                                      Word Tokens in the Densest hour                
##                               (1)                 (2)                  (3)          
## ------------------------------------------------------------------------------------
## Child Tokens per Hour        0.834               0.919                0.996         
##                             (0.841)             (0.823)              (0.648)        
##                                                                                     
## Language Group                                 -686.979.            -748.291*       
##                                                (404.033)            (318.137)       
##                                                                                     
## Book Sharing (min)                                                  83.137***       
##                                                                      (16.629)       
##                                                                                     
## Constant                 3,054.460***        3,340.207***          2,351.618***     
##                            (457.731)           (477.810)            (424.778)       
##                                                                                     
## ------------------------------------------------------------------------------------
## Observations                  42                  42                    42          
## R2                           0.024               0.091                0.452         
## Adjusted R2                 -0.0004              0.045                0.409         
## Residual Std. Error   1,335.807 (df = 40) 1,305.309 (df = 39)  1,027.042 (df = 38)  
## F Statistic           0.983 (df = 1; 40)  1.960 (df = 2; 39)  10.443*** (df = 3; 38)
## ====================================================================================
## Note:                                      . p<0.1; * p<0.05; ** p<0.01; *** p<0.001
# plot model
plot_model(m3.bronly, type = "pred", terms = c("books"),
           show.data = T,
           dot.size = 4, 
           colors = "darkviolet") +
  aes(color = group) + 
  font_size(axis_title.x = 25, axis_title.y = 25, labels.x = 18, labels.y = 18) + 
  labs(x = "Book Sharing Duration (min)", y = "Caregiver\nTotal Word Tokens\n(Densest tCDS Hour)", title = "") +
  theme(axis.title.y = element_text(angle = 0, vjust = 0.5), 
        legend.position = "none")

ggsave("./figures/balloon_bronly.pdf", width = 12, height = 8, units = "in", dpi = 300)




# model with ONLY BR families and cc first
m3.bronly.cc <- lm(tokens_hr_adult ~ tokens_hr_child + language + cc,
         data = bronly_tokens)

anova(m1.bronly, m2.bronly, m3.bronly.cc, m4.bronly, m5.bronly, m6.bronly)
## Analysis of Variance Table
## 
## Model 1: tokens_hr_adult ~ tokens_hr_child
## Model 2: tokens_hr_adult ~ tokens_hr_child + language
## Model 3: tokens_hr_adult ~ tokens_hr_child + language + cc
## Model 4: tokens_hr_adult ~ tokens_hr_child + language + books + cc
## Model 5: tokens_hr_adult ~ tokens_hr_child + language + books + cc + ac
## Model 6: tokens_hr_adult ~ tokens_hr_child + language + books + cc + ac + 
##     non_tcds
##   Res.Df      RSS Df Sum of Sq       F    Pr(>F)    
## 1     40 71375267                                   
## 2     39 66449426  1   4925841  4.7571   0.03598 *  
## 3     38 65524463  1    924963  0.8933   0.35107    
## 4     37 39680008  1  25844455 24.9591 1.624e-05 ***
## 5     36 38279155  1   1400852  1.3529   0.25265    
## 6     35 36241501  1   2037654  1.9679   0.16948    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# model with ONLY BR families and non_tCDS above ac
m5.bronly.ods <- lm(tokens_hr_adult ~ tokens_hr_child + language + books + cc + non_tcds,
         data = bronly_tokens)

anova(m1.bronly, m2.bronly, m3.bronly, m4.bronly, m5.bronly.ods, m6.bronly)
## Analysis of Variance Table
## 
## Model 1: tokens_hr_adult ~ tokens_hr_child
## Model 2: tokens_hr_adult ~ tokens_hr_child + language
## Model 3: tokens_hr_adult ~ tokens_hr_child + language + books
## Model 4: tokens_hr_adult ~ tokens_hr_child + language + books + cc
## Model 5: tokens_hr_adult ~ tokens_hr_child + language + books + cc + non_tcds
## Model 6: tokens_hr_adult ~ tokens_hr_child + language + books + cc + ac + 
##     non_tcds
##   Res.Df      RSS Df Sum of Sq       F    Pr(>F)    
## 1     40 71375267                                   
## 2     39 66449426  1   4925841  4.7571   0.03598 *  
## 3     38 40082948  1  26366478 25.4633 1.396e-05 ***
## 4     37 39680008  1    402941  0.3891   0.53680    
## 5     36 39081454  1    598553  0.5780   0.45217    
## 6     35 36241501  1   2839953  2.7427   0.10664    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# diagnostics
plot_model(m3.bronly, type = "diag")
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

Regressions (NON BR only families) - cc* + ac + gemods; same if switch cc and books

# models with NON BR families
nonbr_tokens <- freq_reg_adults_wide %>% 
  filter(br_group == 0)

# regressions
m1.nonbr <- lm(tokens_hr_adult ~ tokens_hr_child, 
         data = nonbr_tokens)
m2.nonbr <- lm(tokens_hr_adult ~ tokens_hr_child + language,
         data = nonbr_tokens)
m3.nonbr <- lm(tokens_hr_adult ~ tokens_hr_child + language + cc,
         data = nonbr_tokens)
m4.nonbr <- lm(tokens_hr_adult ~ tokens_hr_child + language + cc + ac,
         data = nonbr_tokens)
m5.nonbr <- lm(tokens_hr_adult ~ tokens_hr_child + language + cc + ac + non_tcds,
         data = nonbr_tokens)

anova(m1.nonbr, m2.nonbr, m3.nonbr, m4.nonbr, m5.nonbr)
## Analysis of Variance Table
## 
## Model 1: tokens_hr_adult ~ tokens_hr_child
## Model 2: tokens_hr_adult ~ tokens_hr_child + language
## Model 3: tokens_hr_adult ~ tokens_hr_child + language + cc
## Model 4: tokens_hr_adult ~ tokens_hr_child + language + cc + ac
## Model 5: tokens_hr_adult ~ tokens_hr_child + language + cc + ac + non_tcds
##   Res.Df      RSS Df Sum of Sq      F  Pr(>F)  
## 1     46 39920314                              
## 2     45 37515133  1   2405181 3.1538 0.08299 .
## 3     44 32693714  1   4821419 6.3222 0.01584 *
## 4     43 32550184  1    143529 0.1882 0.66664  
## 5     42 32030174  1    520010 0.6819 0.41361  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m3.nonbr)
## 
## Call:
## lm(formula = tokens_hr_adult ~ tokens_hr_child + language + cc, 
##     data = nonbr_tokens)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1283.63  -578.33   -95.85   455.84  1863.78 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1473.962    353.865   4.165 0.000143 ***
## tokens_hr_child    1.955      0.581   3.364 0.001599 ** 
## languagespanish -455.133    253.398  -1.796 0.079343 .  
## cc                24.509      9.621   2.547 0.014427 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 862 on 44 degrees of freedom
## Multiple R-squared:  0.4072, Adjusted R-squared:  0.3668 
## F-statistic: 10.08 on 3 and 44 DF,  p-value: 3.568e-05
summary(m4.nonbr)
## 
## Call:
## lm(formula = tokens_hr_adult ~ tokens_hr_child + language + cc + 
##     ac, data = nonbr_tokens)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1289.10  -627.14   -65.08   469.58  1932.32 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)   
## (Intercept)     1637.5147   518.3136   3.159  0.00289 **
## tokens_hr_child    2.0199     0.6052   3.337  0.00175 **
## languagespanish -426.3603   264.1618  -1.614  0.11384   
## cc                20.8609    12.8250   1.627  0.11113   
## ac                -9.8591    22.6418  -0.435  0.66542   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 870 on 43 degrees of freedom
## Multiple R-squared:  0.4098, Adjusted R-squared:  0.3549 
## F-statistic: 7.465 on 4 and 43 DF,  p-value: 0.0001168
# checking if language is a moderator
m3.nonbr_intx <- lm(tokens_hr_adult ~ tokens_hr_child + (language * cc),
         data = nonbr_tokens)
anova(m3.nonbr, m3.nonbr_intx)
## Analysis of Variance Table
## 
## Model 1: tokens_hr_adult ~ tokens_hr_child + language + cc
## Model 2: tokens_hr_adult ~ tokens_hr_child + (language * cc)
##   Res.Df      RSS Df Sum of Sq      F Pr(>F)
## 1     44 32693714                           
## 2     43 30895448  1   1798266 2.5028  0.121
# table
stargazer(m1.nonbr, m2.nonbr, m3.nonbr, m4.nonbr, type = "text",
         star.char = c(".","*","**","***"),
         star.cutoffs = c(.1, .05, .01, .001),
         notes = c(". p<0.1; * p<0.05; ** p<0.01; *** p<0.001"),
         notes.append = F,
         digits = 3,
         dep.var.labels = c("Word Tokens in the Densest hour"),
         covariate.labels=c("Child Tokens per Hour", "Language Group",
                            "Other Child-Centered Activities (min)", 
                            "Adult-Centered Activities (min)"))
## 
## ================================================================================================================================
##                                                                          Dependent variable:                                    
##                                       ------------------------------------------------------------------------------------------
##                                                                    Word Tokens in the Densest hour                              
##                                                (1)                    (2)                    (3)                    (4)         
## --------------------------------------------------------------------------------------------------------------------------------
## Child Tokens per Hour                        2.506***               2.319***               1.955**                2.020**       
##                                              (0.598)                (0.596)                (0.581)                (0.605)       
##                                                                                                                                 
## Language Group                                                     -455.901.              -455.133.              -426.360       
##                                                                    (268.407)              (253.398)              (264.162)      
##                                                                                                                                 
## Other Child-Centered Activities (min)                                                      24.509*                20.861        
##                                                                                            (9.621)               (12.825)       
##                                                                                                                                 
## Adult-Centered Activities (min)                                                                                   -9.859        
##                                                                                                                  (22.642)       
##                                                                                                                                 
## Constant                                   1,626.145***           1,936.715***           1,473.962***           1,637.515**     
##                                             (270.010)              (321.664)              (353.866)              (518.314)      
##                                                                                                                                 
## --------------------------------------------------------------------------------------------------------------------------------
## Observations                                    48                     48                     48                    48          
## R2                                            0.276                  0.320                  0.407                  0.410        
## Adjusted R2                                   0.260                  0.290                  0.367                  0.355        
## Residual Std. Error                     931.575 (df = 46)      913.055 (df = 45)      861.997 (df = 44)      870.047 (df = 43)  
## F Statistic                           17.555*** (df = 1; 46) 10.580*** (df = 2; 45) 10.076*** (df = 3; 44) 7.465*** (df = 4; 43)
## ================================================================================================================================
## Note:                                                                                  . p<0.1; * p<0.05; ** p<0.01; *** p<0.001
# plot model
plot_model(m3.nonbr, type = "pred", terms = c("cc"),
           show.data = T,
           dot.size = 4,
           colors = "firebrick1") +
  aes(color = group) + 
  font_size(axis_title.x = 25, axis_title.y = 25, labels.x = 18, labels.y = 18) + 
  labs(x = "Other Child-Centered Duration (min)", y = "Caregiver\nTotal Word Tokens\n(Densest tCDS Hour)", title = "") +
  theme(axis.title.y = element_text(angle = 0, vjust = 0.5), 
        legend.position = "none")

ggsave("./figures/balloon_nonbronly.pdf", width = 12, height = 8, units = "in", dpi = 300)



# model with NON BR families and ac above cc; p = .06
m3.nonbr.ac <- lm(tokens_hr_adult ~ tokens_hr_child + language + ac,
         data = nonbr_tokens)

anova(m1.nonbr, m2.nonbr, m3.nonbr.ac, m4.nonbr, m5.nonbr)
## Analysis of Variance Table
## 
## Model 1: tokens_hr_adult ~ tokens_hr_child
## Model 2: tokens_hr_adult ~ tokens_hr_child + language
## Model 3: tokens_hr_adult ~ tokens_hr_child + language + ac
## Model 4: tokens_hr_adult ~ tokens_hr_child + language + cc + ac
## Model 5: tokens_hr_adult ~ tokens_hr_child + language + cc + ac + non_tcds
##   Res.Df      RSS Df Sum of Sq      F  Pr(>F)  
## 1     46 39920314                              
## 2     45 37515133  1   2405181 3.1538 0.08299 .
## 3     44 34552984  1   2962148 3.8842 0.05536 .
## 4     43 32550184  1   2002800 2.6262 0.11260  
## 5     42 32030174  1    520010 0.6819 0.41361  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m4.nonbr)
## 
## Call:
## lm(formula = tokens_hr_adult ~ tokens_hr_child + language + cc + 
##     ac, data = nonbr_tokens)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1289.10  -627.14   -65.08   469.58  1932.32 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)   
## (Intercept)     1637.5147   518.3136   3.159  0.00289 **
## tokens_hr_child    2.0199     0.6052   3.337  0.00175 **
## languagespanish -426.3603   264.1618  -1.614  0.11384   
## cc                20.8609    12.8250   1.627  0.11113   
## ac                -9.8591    22.6418  -0.435  0.66542   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 870 on 43 degrees of freedom
## Multiple R-squared:  0.4098, Adjusted R-squared:  0.3549 
## F-statistic: 7.465 on 4 and 43 DF,  p-value: 0.0001168
# model with NON BR families and gemods above ac
m4.nonbr.ods <- lm(tokens_hr_adult ~ tokens_hr_child + language + cc + non_tcds,
         data = nonbr_tokens)

anova(m1.nonbr, m2.nonbr, m3.nonbr, m4.nonbr.ods, m5.nonbr)
## Analysis of Variance Table
## 
## Model 1: tokens_hr_adult ~ tokens_hr_child
## Model 2: tokens_hr_adult ~ tokens_hr_child + language
## Model 3: tokens_hr_adult ~ tokens_hr_child + language + cc
## Model 4: tokens_hr_adult ~ tokens_hr_child + language + cc + non_tcds
## Model 5: tokens_hr_adult ~ tokens_hr_child + language + cc + ac + non_tcds
##   Res.Df      RSS Df Sum of Sq      F  Pr(>F)  
## 1     46 39920314                              
## 2     45 37515133  1   2405181 3.1538 0.08299 .
## 3     44 32693714  1   4821419 6.3222 0.01584 *
## 4     43 32628821  1     64892 0.0851 0.77195  
## 5     42 32030174  1    598647 0.7850 0.38067  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# correlations
ggplot(nonbr_tokens, aes(cc, ac, color = language)) + 
  geom_point() + 
  geom_smooth(method = "lm")

cor.test(nonbr_tokens$cc, nonbr_tokens$ac)
## 
##  Pearson's product-moment correlation
## 
## data:  nonbr_tokens$cc and nonbr_tokens$ac
## t = -5.3799, df = 46, p-value = 2.432e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.7696821 -0.4096571
## sample estimates:
##        cor 
## -0.6214557
# diagnostics
plot_model(m3.nonbr, type = "diag")
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]